Savannah Social Data Analysis

By: Emily Amspoker, Fall 2025

Importing Libraries

library(sf)
library(tidyverse)
library(ggplot2)
library(readxl)
library(plotly)
library(dplyr)
library(foreign)
library(ggiraph)
library(patchwork)
library(gender)
library (osmdata)
library(httr)
library(xmlr)
library(xml2)
library(jsonlite)
library(tidytext)
library(tm)
library(SnowballC)
library(wordcloud)

Importing and Cleaning Data

Importing Demographic Data

# convert time at residence/in Savannah to double
demographic_data <- read_csv("Data1_DemographicData.csv", show_col_types = FALSE) %>% 
  mutate (ID = `ID Number`, 
          residence_time = as.double(str_extract(`How long have you lived at your residence?`, "[^ years]*")), 
          savannah_time =  as.double(str_extract(`How long have you lived in Savannah?`, "[^ years]*") )) %>%
  select(-`How long have you lived at your residence?`, -`How long have you lived in Savannah?`)

Displaying Distributions of Relevant Demographic Data (Percentages or Average/Min/Max/Standard Deviation)

demographic_data_for_table <- select(demographic_data, -Timestamp, -ID, -`ID Number`, -City, -State, - `Cross Street`, -`Accuracy`, -`Longitude`, -`Latitude`, -`How many people live in your home with you?`)

print_stats <- function(variable) {
  if (is.character(demographic_data[[variable]]))
  {
    factor_version <- factor(demographic_data[[variable]])
    print(paste(variable, " MEAN"))
    print(mean(as.numeric(factor_version) - 1))
    counts <- demographic_data %>% group_by_at(variable) %>%
      summarize(count=n(), .groups="drop") %>%
      mutate(percent = 100*(count/nrow(demographic_data)))
    print(counts)

    
  } else {
    print(paste(variable," STATS"))
    print(paste(mean(demographic_data[[variable]]) , " & ", min(demographic_data[[variable]]) , "-" , max(demographic_data[[variable]]) , " (st. dev. = " , sd(demographic_data[[variable]]) , ")", sep=""))
    
  }
}
nrow(demographic_data)
## [1] 75
for (variable in names(demographic_data_for_table))
{
  print_stats(variable)
}
## [1] "Do you own or have access to a car?  MEAN"
## [1] 0.84
## # A tibble: 2 × 3
##   `Do you own or have access to a car?` count percent
##   <chr>                                 <int>   <dbl>
## 1 No                                       12      16
## 2 Yes                                      63      84
## [1] "Do you drive?  MEAN"
## [1] 0.92
## # A tibble: 2 × 3
##   `Do you drive?` count percent
##   <chr>           <int>   <dbl>
## 1 No                  6       8
## 2 Yes                69      92
## [1] "HHSize  STATS"
## [1] "2.41333333333333 & 1-6 (st. dev. = 1.39587552040591)"
## [1] "What is your age range?  MEAN"
## [1] 1.333333
## # A tibble: 4 × 3
##   `What is your age range?` count percent
....

Importing Social Network Data, Open-Ended Data

social_network_data <- read_excel("Data2_SocialNetworkData_CA.xlsx")
open_ended_data <- read_excel("Data3_OpenResponse.xlsx")

# Group the "Friend" and "Frienship" category together
gis_data <- read.dbf("AllPoints_V11.dbf", as.is=TRUE) %>%
             mutate(RELATIONSH = ifelse(RELATIONSH=="Friend", "Friendship", RELATIONSH))
gis_data
##      SOCIALNETW                                           CHECK SOCIALNE_1
## 1          1025                                         Correct      10071
## 2          1319                                         Correct      10095
## 3          1320                                         Correct      10095
## 4          1321                                         Correct      10095
## 5          1322                                         Correct      10095
## 6           459                                         Correct      10049
## 7           460                                         Correct      10049
## 8            87                                         Correct      10048
## 9            88                                         Correct      10048
## 10           89                                         Correct      10048
## 11          486 Different from Google - needs to be re-geocoded      10064
## 12          487                                         Correct      10064
## 13          488                                         Correct      10064
## 14          926                                         Correct      10072
## 15          927                                    Clio Updated      10072
## 16          928                                         Correct      10072
## 17          995                                         Correct      10071
## 18          996                                         Correct      10071
## 19          997                                            <NA>      10071
....
all_data <- left_join(left_join(demographic_data, social_network_data, by="ID Number"), open_ended_data, by="ID Number")
all_data
## # A tibble: 75 × 32
##    Timestamp  `ID Number` City  State `Cross Street` Accuracy Latitude Longitude
##    <chr>            <dbl> <chr> <chr> <chr>          <chr>       <dbl>     <dbl>
##  1 8/22/2023…       10025 Sava… Geor… Montgomery cr… interse…     32.0     -81.1
##  2 8/22/2023…       10028 Sava… Geor… 512 Abercorn … Rooftop      32.1     -81.1
##  3 9/26/2023…       10029 Sava… Geor… Reynolds & wa… Cross S…     32.0     -81.1
##  4 7/12/2023…       10038 Sava… Geor… 917 Bubbedge … Rooftop      32.1     -81.1
##  5 8/22/2023…       10039 Sava… Geor… 7345 Grant St  Rooftop      32.0     -81.1
##  6 9/19/2023…       10040 Sava… Geor… 208 Ford aven… Street …     32.0     -81.3
##  7 8/16/2023…       10041 Sava… Geor… Johnny Mercer… Street …     32.0     -81.0
##  8 9/19/2023…       10042 Sava… Geor… Park & Harmon  Street …     32.1     -81.1
##  9 7/18/2023…       10043 Sava… Geor… Skylark Road,… Street …     32.1     -81.3
## 10 7/12/2023…       10044 Sava… Geor… 313 West Hall… interse…     32.1     -81.1
## # ℹ 65 more rows
## # ℹ 24 more variables: `Do you own or have access to a car?` <chr>,
## #   `Do you drive?` <chr>, `How many people live in your home with you?` <chr>,
## #   HHSize <dbl>, `What is your age range?` <chr>,
## #   `What gender do you identify with?` <chr>,
## #   `What race/ethnicity do you identify with?` <chr>,
## #   `Disabled Community?` <chr>, `LGBTQ+ Community?` <chr>, ID <dbl>, …
....

Displaying Counts of POIs by Relationships

poi_counts_by_relationships <- gis_data %>%
                                group_by(RELATIONSH) %>%
                                summarize (poi_count = n()) %>%
                                select(RELATIONSH, poi_count)
poi_counts_by_relationships
## # A tibble: 5 × 2
##   RELATIONSH   poi_count
##   <chr>            <int>
## 1 Family             573
## 2 Friendship         398
## 3 Other               50
## 4 Professional        45
## 5 Romantic           118

Importing Map Data

sav_map <- read_sf("ZIP_Codes.geojson")

Adding New Columns

Adding Guesses for Gender Based on Name, Relationship Gender Variable

map_gender_relationships <- function(participant_gender, relationship_gender) {
  if (participant_gender == "Female" && relationship_gender == "F")
  {
    "F-F"
  } else if (participant_gender == "Male" && relationship_gender == "M") {
    "M-M"
  } else if (participant_gender == "Female" && relationship_gender == "M")
  {
    "F-M"
  } else if (participant_gender == "Male" && relationship_gender == "F")
  {
    "M-F"
  } else if (relationship_gender == "M")
  {
    "N-M"
  } else if (relationship_gender == "F") {
    "N-F"
  } else if (participant_gender == "Female") {
    "F-U"
  } else if (participant_gender == "Male") {
    "M-U"
  } else {
    "N-U"
  }
}


map_gender_guess <- function(relationship_gender, person) {
  name <- unlist(strsplit(person, split=' '))[1]
  name <- unlist(strsplit(name, split=')'))[1]
  guess <- gender(name, method="ssa")
  if (nrow(guess) > 0 && relationship_gender == "Undefined from relationship")
  {
    if (guess$proportion_female > 0.99)
    {
      "F"
    } else if (guess$proportion_male > 0.99) {
      "M"
    } else {
      relationship_gender
    }
  }
  else {
    relationship_gender
  }
}


gis_demographic_data <- gis_data %>% 
                  left_join(select(demographic_data, ID, `What gender do you identify with?`), 
                            by = c("SOCIALNE_1" = "ID"))
gis_demographic_data
##      SOCIALNETW                                           CHECK SOCIALNE_1
## 1          1025                                         Correct      10071
## 2          1319                                         Correct      10095
## 3          1320                                         Correct      10095
## 4          1321                                         Correct      10095
## 5          1322                                         Correct      10095
## 6           459                                         Correct      10049
## 7           460                                         Correct      10049
## 8            87                                         Correct      10048
## 9            88                                         Correct      10048
## 10           89                                         Correct      10048
## 11          486 Different from Google - needs to be re-geocoded      10064
## 12          487                                         Correct      10064
## 13          488                                         Correct      10064
## 14          926                                         Correct      10072
## 15          927                                    Clio Updated      10072
## 16          928                                         Correct      10072
## 17          995                                         Correct      10071
## 18          996                                         Correct      10071
## 19          997                                            <NA>      10071
....
gis_gender_data <- gis_demographic_data %>%
                  mutate(relationship_gender = unlist(pmap(list(`What gender do you identify with?`, GENDER), map_gender_relationships)))


gis_gender_data_guess <- gis_demographic_data %>%
                  mutate(gender_guess = unlist(pmap(list(GENDER, PERSON_WIT), map_gender_guess))) %>%
                   mutate(relationship_gender = unlist(pmap(list(`What gender do you identify with?`, gender_guess), map_gender_relationships)))

gis_gender_data_guess
##      SOCIALNETW                                           CHECK SOCIALNE_1
## 1          1025                                         Correct      10071
## 2          1319                                         Correct      10095
## 3          1320                                         Correct      10095
## 4          1321                                         Correct      10095
## 5          1322                                         Correct      10095
## 6           459                                         Correct      10049
## 7           460                                         Correct      10049
## 8            87                                         Correct      10048
## 9            88                                         Correct      10048
## 10           89                                         Correct      10048
## 11          486 Different from Google - needs to be re-geocoded      10064
## 12          487                                         Correct      10064
## 13          488                                         Correct      10064
## 14          926                                         Correct      10072
## 15          927                                    Clio Updated      10072
## 16          928                                         Correct      10072
## 17          995                                         Correct      10071
## 18          996                                         Correct      10071
## 19          997                                            <NA>      10071
....

Visualization of Relationship Gender Variable, with Map and Relationship Type

library(RColorBrewer)

gis_gender_data_guess_filtered <- gis_gender_data_guess %>% filter(LAT > 31.0 & LAT < 32.7)
gis_gender_data_guess %>%
  ggplot(aes(x=RELATIONSH, fill=relationship_gender)) +
  scale_fill_brewer(palette = "Dark2") +
  geom_bar(position = "dodge") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0))

p1 <- gis_gender_data_guess_filtered %>%
  filter(relationship_gender=="F-F" | relationship_gender=="F-M" | relationship_gender=="M-F" | relationship_gender=="M-M") %>%
  ggplot(aes(x=RELATIONSH, fill=relationship_gender, data_id=relationship_gender)) +
   scale_fill_brewer(palette = "Dark2") +
  geom_bar_interactive(position = "dodge") +  theme(axis.text.x = element_text(angle = 90, vjust = 0))


p2 <- sav_map %>%
  ggplot() +
  geom_sf() +
  geom_point_interactive(data=gis_gender_data_guess_filtered %>% filter(relationship_gender == "F-F" | relationship_gender == "F-M" |relationship_gender == "M-F"  | relationship_gender == "M-M"), alpha=0.5, aes(x=LON, y=LAT, color=relationship_gender, data_id=relationship_gender, tooltip = UNIQUE)) +
   scale_color_brewer(palette = "Dark2") +  theme(axis.text.x = element_text(angle = 90, vjust = 0))


hover_css <- "
  filter: brightness(75%);
  cursor: pointer;
  transition: all 0.5s ease-out;
  filter: brightness(1.15);
"


combined_plot <- p1 / p2 + plot_layout(nrow = 2)

interactive_plot <- girafe(ggobj = combined_plot) %>%
                    girafe_options(
                      opts_hover(css = hover_css),
                     opts_hover_inv(css = "opacity:0.1; transition: all 0.2s ease-out;"),
                     opts_sizing(rescale = TRUE)
                    )

interactive_plot
htmltools::save_html(interactive_plot, "interactive_map.html")

Chi-Squared Tests and Mosaic Plots For Gender of Reported Relationships

total_participants <- nrow(demographic_data)-1

f_f <- sum(gis_gender_data_guess$relationship_gender == "F-F")
m_m <- sum(gis_gender_data_guess$relationship_gender == "M-M")

m_f <- sum(gis_gender_data_guess$relationship_gender == "M-F")
f_m <- sum(gis_gender_data_guess$relationship_gender == "F-M")

reported_relationships <- as.table(rbind(c(m_m, m_f), c(f_m, f_f)))

dimnames(reported_relationships) <- list(gender_relationship = c("M", "F"),
                    gender_participant = c("M", "F"))

Xsq <- chisq.test(reported_relationships)
Xsq$observed   # observed counts (same as M)
##                    gender_participant
## gender_relationship   M   F
##                   M  82  71
##                   F 307 567
Xsq$expected   # expected counts under the null
##                    gender_participant
## gender_relationship         M         F
##                   M  57.95229  95.04771
##                   F 331.04771 542.95229
Xsq$residuals  # Pearson residuals
##                    gender_participant
## gender_relationship         M         F
##                   M  3.158919 -2.466624
##                   F -1.321687  1.032031
Xsq$stdres     # standardized residuals
##                    gender_participant
## gender_relationship         M         F
##                   M  4.344531 -4.344531
##                   F -4.344531  4.344531
Xsq$p.value
## [1] 2.097986e-05
mosaicplot(reported_relationships, las = 1, shade=TRUE)

Chi-Squared Tests and Mosaic Plots For Gender Coding of Open-Ended Questions

spend_time <- read_csv("Spend_Time_CLOSED_Coding.csv", show_col_types = FALSE)
more_places <- read_csv("More_Places_CLOSED_Coding.csv", show_col_types = FALSE)

spend_time_merged <- spend_time %>%
  left_join(select(demographic_data, ID, `What gender do you identify with?`), by = "ID")
more_places_merged <- more_places %>%
  left_join(select(demographic_data, ID, `What gender do you identify with?`), by = "ID")

filter(spend_time_merged, is.na(Code))
## # A tibble: 0 × 6
## # ℹ 6 variables: ID <dbl>, Response <chr>, Person <chr>, Code <chr>,
## #   Notes <chr>, What gender do you identify with? <chr>
spend_time_guess <- spend_time_merged %>%
                  mutate(gender_guess = unlist(pmap(list(Code, Person), map_gender_guess))) %>%
                   mutate(relationship_gender = unlist(pmap(list(`What gender do you identify with?`, gender_guess), map_gender_relationships)))

spend_time_guess %>%
   ggplot(aes(x=relationship_gender)) +
   geom_bar(fill="darkblue") +
   labs(title = "Relationships by Gender")

spend_time_guess %>%
   ggplot(aes(x=relationship_gender)) +
   geom_bar(fill="darkblue") +
   labs(title = "Relationships by Gender (Guess)")

spend_time_guess
## # A tibble: 81 × 8
##       ID Response         Person Code  Notes What gender do you i…¹ gender_guess
##    <dbl> <chr>            <chr>  <chr> <chr> <chr>                  <chr>       
##  1 10025 Her cousins      (Cous… U     <NA>  Female                 U           
##  2 10028 Haley. She has … Haley  F     <NA>  Female                 F           
##  3 10029 My old coworker… (Form… U     <NA>  Female                 U           
##  4 10038 Her Fiance Char… Charl… M     <NA>  Female                 M           
##  5 10040 Grandkids- they… (Gran… U     <NA>  Female                 U           
##  6 10041 Family (Mother … (Moth… F     <NA>  Female                 F           
##  7 10041 Family (Mother … (Fath… M     <NA>  Female                 M           
##  8 10042 Grandparents- T… (Gran… U     <NA>  Female                 U           
##  9 10043 Her sister, Nic… Nicol… F     <NA>  Female                 F           
## 10 10044 Would like to r… Miche… Unde… <NA>  Female                 F           
## # ℹ 71 more rows
## # ℹ abbreviated name: ¹​`What gender do you identify with?`
## # ℹ 1 more variable: relationship_gender <chr>
total_participants <- nrow(demographic_data)-1

f_f <- sum(spend_time_guess$relationship_gender == "F-F")
m_m <- sum(spend_time_guess$relationship_gender == "M-M")

m_f <- sum(spend_time_guess$relationship_gender == "M-F")
f_m <- sum(spend_time_guess$relationship_gender == "F-M")

spend_time_table <- as.table(rbind(c(m_m, m_f), c(f_m, f_f)))

dimnames(spend_time_table) <- list(gender_relationship = c("M", "F"),
                    gender_participant = c("M", "F"))

Xsq <- chisq.test(spend_time_table)
Xsq$observed   # observed counts (same as M)
##                    gender_participant
## gender_relationship  M  F
##                   M  1  5
##                   F 14 31
Xsq$expected   # expected counts under the null
##                    gender_participant
## gender_relationship         M         F
##                   M  1.764706  4.235294
##                   F 13.235294 31.764706
Xsq$residuals  # Pearson residuals
##                    gender_participant
## gender_relationship          M          F
##                   M -0.5756497  0.3715803
##                   F  0.2101975 -0.1356819
Xsq$stdres     # standardized residuals
##                    gender_participant
## gender_relationship          M          F
##                   M -0.7294087  0.7294087
##                   F  0.7294087 -0.7294087
Xsq$p.value
## [1] 0.8006642
mosaicplot(spend_time_table, las = 1, shade=TRUE)

filter(more_places_merged, is.na(Gender))
## # A tibble: 0 × 5
## # ℹ 5 variables: ID <dbl>, Response <chr>, Person <chr>, Gender <chr>,
## #   What gender do you identify with? <chr>
more_places_guess <- more_places_merged %>%
                 mutate(gender_guess = unlist(pmap(list(Gender, Person), map_gender_guess))) %>%
                   mutate(relationship_gender = unlist(pmap(list(`What gender do you identify with?`, gender_guess), map_gender_relationships)))
more_places_guess %>%
   ggplot(aes(x=relationship_gender)) +
   geom_bar(fill="darkblue") +
   labs(title = "Relationships by Gender")

more_places_guess %>%
   ggplot(aes(x=relationship_gender)) +
   geom_bar(fill="darkblue") +
   labs(title = "Relationships by Gender (Guess)")

f_f <- sum(more_places_guess$relationship_gender == "F-F")
m_m <- sum(more_places_guess$relationship_gender == "M-M")

m_f <- sum(more_places_guess$relationship_gender == "M-F")
f_m <- sum(more_places_guess$relationship_gender == "F-M")

more_places_table <- as.table(rbind(c(m_m, m_f), c(f_m, f_f)))

dimnames(more_places_table) <- list(gender_relationship = c("M", "F"),
                    gender_participant = c("M", "F"))

Xsq <- chisq.test(more_places_table)
Xsq$observed   # observed counts (same as M)
##                    gender_participant
## gender_relationship  M  F
##                   M  2  5
##                   F 16 35
Xsq$expected   # expected counts under the null
##                    gender_participant
## gender_relationship         M         F
##                   M  2.172414  4.827586
##                   F 15.827586 35.172414
Xsq$residuals  # Pearson residuals
##                    gender_participant
## gender_relationship           M           F
##                   M -0.11697707  0.07847060
##                   F  0.04333758 -0.02907173
Xsq$stdres     # standardized residuals
##                    gender_participant
## gender_relationship          M          F
##                   M -0.1502151  0.1502151
##                   F  0.1502151 -0.1502151
Xsq$p.value
## [1] 1
mosaicplot(more_places_table, las = 1, shade=TRUE)

Geocoding for Place Type Using OpenStreetMapData (OSM)

Example Function for Looking Up OSM Type

lookup_osm <- function (address, lat, lon) {
  
  url <- paste("http://nominatim.openstreetmap.org/search?format=json&q=", gsub(" ", "+", address), "&format=json", sep="")
  
  
  place_type <- tryCatch({
    res <- GET(url)
    data <- content(res,"text")
    if (!is.null(data)) {
       place_data <- jsonlite::fromJSON(data)
       toString(place_data$type)
      }
}, error = function(e) {
    "ERROR"
})


 
  if (is.null(place_type) || place_type == "ERROR" )
  {
    url <- paste("http://nominatim.openstreetmap.org/reverse?format=json&lat=", lat, "&lon=", lon, "&zoom=18&addressdetails=1", sep="")
    
     place_type <- tryCatch({
      res <- GET(url)
      data <- content(res,"text")
      if (!is.null(data)) {
         place_data <- jsonlite::fromJSON(data)
         place_data$type[1]
      }
      }, ERROR = function(e) {
        "ERROR"
      })
  }

  place_type

  
}

This is an example of how to run the above function, I chunked the data because otherwise it would time out occasionally. The rest of the notebook imports the osm_data.csv file, which has been computed using this code.

# replace the above line with ```{r} if you want to run this chunk

 osm_data <- head(gis_gender_data_guess,1) %>%
   mutate(place_type = unlist(pmap(list(ADDRESS_1, LAT, LON), lookup_osm)))
 osm_data



chunks <- ggplot2::cut_interval(1:nrow(gis_gender_data_guess), length=20, labels=FALSE)

unique(chunks)

for (i in unique(chunks))
{

  new_chunk <- gis_gender_data_guess[which(chunks==i),] %>%
       mutate(place_type = unlist(pmap(list(ADDRESS_1, LAT, LON), lookup_osm)))

  osm_data <- rbind(osm_data, new_chunk)
}

osm_data

Import OSM Place Type Data from File

osm_data <- read.csv("osm_data.csv")
osm_data
##       X.1    X SOCIALNETW                                           CHECK
## 1       1    1       1025                                         Correct
## 2       2    2       1319                                         Correct
## 3       3    3       1320                                         Correct
## 4       4    4       1321                                         Correct
## 5       5    5       1322                                         Correct
## 6       6    6        459                                         Correct
## 7       7    7        460                                         Correct
## 8       8    8         87                                         Correct
## 9       9    9         88                                         Correct
## 10     10   10         89                                         Correct
## 11     11   11        486 Different from Google - needs to be re-geocoded
## 12     12   12        487                                         Correct
## 13     13   13        488                                         Correct
## 14     14   14        926                                         Correct
## 15     15   15        927                                    Clio Updated
## 16     16   16        928                                         Correct
## 17     17   17        995                                         Correct
## 18     18   18        996                                         Correct
## 19     19   19        997                                            <NA>
....

Example Visualization Using OSM Place Type

library(RColorBrewer)

filtered_osm <- osm_data  %>%
  filter(place_type != "yes" & place_type != "unclassified" & place_type != "primary" & place_type != "secondary"  &  place_type != "tertiary" & place_type != "parking") %>%
  mutate(place_type = ifelse(place_type=="residential" | place_type=="house", "residential or house", place_type),
         RELATIONSH = ifelse(RELATIONSH=="Friend", "Friendship", RELATIONSH)) %>%
  filter(place_type != "residential or house")

top_7_place_types <- filtered_osm %>%
  count(place_type, sort = TRUE) %>%
  slice(1:7) %>% left_join(filtered_osm)


p1 <- top_7_place_types %>%
  ggplot(aes(x=RELATIONSH, fill=place_type, data_id=place_type)) +
   scale_fill_brewer(palette = "Dark2") +
  geom_bar_interactive(position = "dodge") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0))

p2 <- sav_map %>%
  ggplot() +
  geom_sf() +
  geom_point_interactive(data=top_7_place_types, alpha=0.5, aes(x=LON, y=LAT, color=place_type, data_id=place_type, tooltip = UNIQUE)) +
   scale_color_brewer(palette = "Dark2") +
   theme(axis.text.x = element_text(angle = 90, vjust = 0))



hover_css <- "
  filter: brightness(75%);
  cursor: pointer;
  transition: all 0.5s ease-out;
  filter: brightness(1.15);
"


combined_plot <- p1 / p2 + plot_layout(nrow = 2)

interactive_plot <- girafe(ggobj = combined_plot) %>%
                    girafe_options(
                      opts_hover(css = hover_css),
                     opts_hover_inv(css = "opacity:0.1; transition: all 0.2s ease-out;"),
                     opts_sizing(rescale = TRUE)
                    )

interactive_plot
htmltools::save_html(interactive_plot, "interactive_map2.html")

Earlier Exploratory Data Visualizations

Gender Barcharts

gis_gender_data %>%
   ggplot(aes(x=relationship_gender)) +
   geom_bar(fill="darkblue") +
   labs(title = "Relationships by Gender")

gis_gender_data_guess %>%
   ggplot(aes(x=relationship_gender)) +
   geom_bar(fill="darkblue") +
   labs(title = "Relationships by Gender (Guess)")

gis_gender_data_guess %>%
   ggplot(aes(x=relationship_gender)) +
   geom_bar(fill="darkblue") +
   labs(title = "Relationships by Gender (Guess)")

Gender Maps (Non-Interactive)

gis_gender_data_guess_filtered <- gis_gender_data_guess %>%  filter(LAT > 31.7 & LAT < 32.3 & LON>  -81.4 & LON < -80.9)

gis_gender_data_guess_filtered 
##      SOCIALNETW                                           CHECK SOCIALNE_1
## 1          1025                                         Correct      10071
## 2          1319                                         Correct      10095
## 3          1320                                         Correct      10095
## 4          1321                                         Correct      10095
## 5          1322                                         Correct      10095
## 6           459                                         Correct      10049
## 7           460                                         Correct      10049
## 8            87                                         Correct      10048
## 9            88                                         Correct      10048
## 10           89                                         Correct      10048
## 11          486 Different from Google - needs to be re-geocoded      10064
## 12          487                                         Correct      10064
## 13          488                                         Correct      10064
## 14          926                                         Correct      10072
## 15          927                                    Clio Updated      10072
## 16          928                                         Correct      10072
## 17          995                                         Correct      10071
## 18          996                                         Correct      10071
## 19          997                                            <NA>      10071
....
sav_map %>%
  ggplot() +
  geom_sf() +
  geom_point(data=gis_gender_data_guess_filtered %>% filter(relationship_gender == "F-F" | relationship_gender == "F-M" |relationship_gender == "M-F"  | relationship_gender == "M-M"), alpha=0.5, aes(x=LON, y=LAT, color=relationship_gender, text=UNIQUE)) +
  facet_grid(. ~ relationship_gender)

sav_map %>%
  ggplot() +
  geom_sf() +
  geom_point(data=gis_gender_data_guess %>% filter(relationship_gender == "F-F"), alpha=0.5, aes(x=LON, y=LAT, color=relationship_gender, text=UNIQUE))

sav_map %>%
  ggplot() +
  geom_sf() +
  geom_point(data=gis_gender_data_guess %>% filter(relationship_gender == "F-M"), alpha=0.5, aes(x=LON, y=LAT, color=relationship_gender, text=UNIQUE))

sav_map %>%
  ggplot() +
  geom_sf() +
  geom_point(data=gis_gender_data_guess %>% filter(relationship_gender == "M-F"), alpha=0.5, aes(x=LON, y=LAT, color=relationship_gender, text=UNIQUE))

sav_map %>%
  ggplot() +
  geom_sf() +
  geom_point(data=gis_gender_data_guess %>% filter(relationship_gender == "M-M"), alpha=0.5, aes(x=LON, y=LAT, color=relationship_gender, text=UNIQUE))

Example of Attempted Age Coding With Visualization

# Difficult to assume younger-older or older-younger for most relationships in the data
map_age_relationships <- function(relationship) {
  if (!is.na(relationship))
 {
    if (relationship == "Aunt" | relationship == "Father" | relationship == "Grandparent" | relationship == "Mother" | relationship == "Uncle" | relationship == "In Law")
    {
      "Y - O"
  
    } else if (relationship == "Daughter" | relationship == "Son" | relationship == "Niece" | relationship == "Nephew" | relationship == "Grandchild")
    {
      "O - Y"
    } else if (relationship == "Family" | relationship == "Cousin" | relationship ==  "Husband" | relationship ==  "Wife" | relationship == "Brother" | relationship == "Sister" | relationship == "Boyfriend" |
               relationship == "Boss" | relationship == "Brother" | relationship == "Brother in Law"  | relationship == "Church Relationship" | relationship == "Cousin"    | relationship ==  "Coworker"  | relationship == "Ex Partner" | relationship == "Friend" | relationship ==  "Group"  | relationship ==  "Partner"   | relationship == "Pastor" ){
      "S - S"
    }
  
  } else {
      "UNDEFINED"
  }
}
 gis_demographic_data
##      SOCIALNETW                                           CHECK SOCIALNE_1
## 1          1025                                         Correct      10071
## 2          1319                                         Correct      10095
## 3          1320                                         Correct      10095
## 4          1321                                         Correct      10095
## 5          1322                                         Correct      10095
## 6           459                                         Correct      10049
## 7           460                                         Correct      10049
## 8            87                                         Correct      10048
## 9            88                                         Correct      10048
## 10           89                                         Correct      10048
## 11          486 Different from Google - needs to be re-geocoded      10064
## 12          487                                         Correct      10064
## 13          488                                         Correct      10064
## 14          926                                         Correct      10072
## 15          927                                    Clio Updated      10072
## 16          928                                         Correct      10072
## 17          995                                         Correct      10071
## 18          996                                         Correct      10071
## 19          997                                            <NA>      10071
....
 unique( gis_demographic_data$SOCIALNE_3)
##  [1] "Aunt"                "Boss"                "Boyfriend"          
##  [4] "Brother"             "Brother in Law"      "Church Relationship"
##  [7] "Cousin"              "Coworker"            "Daughter"           
## [10] "Ex Partner"          "Family"              "Father"             
## [13] "Friend"              "Grandchild"          "Grandparent"        
## [16] "Group"               "Husband"             "In Law"             
## [19] "Mother"              "Nephew"              "Niece"              
## [22] "Partner"             "Pastor"              "Sister"             
## [25] "Son"                 "Uncle"               "Wife"               
## [28] NA
gis_age_data_guess <- gis_demographic_data %>%
  mutate(age_guess = unlist(pmap(list(SOCIALNE_3), map_age_relationships))) 

# one small discovery: no "girlfriend" relationship in the dataset
filter(gis_age_data_guess, gis_age_data_guess$SOCIALNE_3 == "Girlfriend")
##  [1] SOCIALNETW                        CHECK                            
##  [3] SOCIALNE_1                        SOCIALNE_2                       
##  [5] PERSON_WIT                        SOCIALNE_3                       
##  [7] NUMBER_OF_                        RELATIONSH                       
##  [9] GENDER                            RELATION_1                       
## [11] LONG_LIST                         SOCIALNE_4                       
## [13] SOCIALNE_5                        SOCIALNE_6                       
## [15] SOCIALNE_7                        UNIQUE                           
## [17] OBJECTID                          FIELD1                           
## [19] ADDRESS                           PRECISION                        
## [21] LAT                               LON                              
## [23] ADDRESS_1                         What gender do you identify with?
## [25] age_guess                        
## <0 rows> (or 0-length row.names)
gis_age_data_guess_filtered <- gis_age_data_guess %>%  filter(LAT > 31.7 & LAT < 32.3 & LON>  -81.4 & LON < -80.9)

gis_age_data_guess_filtered 
##      SOCIALNETW                                           CHECK SOCIALNE_1
## 1          1025                                         Correct      10071
## 2          1319                                         Correct      10095
## 3          1320                                         Correct      10095
## 4          1321                                         Correct      10095
## 5          1322                                         Correct      10095
## 6           459                                         Correct      10049
## 7           460                                         Correct      10049
## 8            87                                         Correct      10048
## 9            88                                         Correct      10048
## 10           89                                         Correct      10048
## 11          486 Different from Google - needs to be re-geocoded      10064
## 12          487                                         Correct      10064
## 13          488                                         Correct      10064
## 14          926                                         Correct      10072
## 15          927                                    Clio Updated      10072
## 16          928                                         Correct      10072
## 17          995                                         Correct      10071
## 18          996                                         Correct      10071
## 19          997                                            <NA>      10071
....
sav_map %>%
  ggplot() +
  geom_sf() +
  geom_point(data=gis_age_data_guess_filtered %>% filter(age_guess != "UNDEFINED"), alpha=0.5, aes(x=LON, y=LAT, color=RELATIONSH, text=UNIQUE)) +
  facet_grid(. ~ age_guess)

sav_map %>%
  ggplot() +
  geom_sf() +
  geom_point(data=gis_age_data_guess %>% filter(age_guess == "Y - O"), alpha=0.5, aes(x=LON, y=LAT, color=RELATIONSH, text=UNIQUE))

sav_map %>%
  ggplot() +
  geom_sf() +
  geom_point(data=gis_age_data_guess %>% filter(age_guess == "O - Y"), alpha=0.5, aes(x=LON, y=LAT, color=RELATIONSH, text=UNIQUE))

sav_map %>%
  ggplot() +
  geom_sf() +
  geom_point(data=gis_age_data_guess %>% filter(age_guess == "S - S"), alpha=0.5, aes(x=LON, y=LAT, color=RELATIONSH, text=UNIQUE))

Barchart About Various Ages

all_data %>%
   ggplot(aes(x=`What is your age range?`)) +
   geom_bar(fill="darkblue") +
   labs(title = "Participants by Age Range",
        x = "Age Range", y = "Number of Participants")

all_data$`What race/ethnicity do you identify with?`
##  [1] "Black / African American" "Other"                   
##  [3] "White"                    "Black / African American"
##  [5] "Black / African American" "Black / African American"
##  [7] "White"                    "Other"                   
##  [9] "Black / African American" "White"                   
## [11] "White"                    "Black / African American"
## [13] "Black / African American" "White"                   
## [15] "Black / African American" "Black / African American"
## [17] "Black / African American" "Black / African American"
## [19] "Black / African American" "Black / African American"
## [21] "White"                    "Black / African American"
## [23] "Black / African American" "Black / African American"
## [25] "White"                    "Black / African American"
## [27] "White"                    "Other"                   
## [29] "Black / African American" "Black / African American"
## [31] "Black / African American" "Black / African American"
## [33] "Black / African American" "White"                   
## [35] "Black / African American" "Black / African American"
## [37] "Black / African American" "Other"                   
## [39] "White"                    "Black / African American"
....
all_data %>%
   ggplot(aes(x=`What race/ethnicity do you identify with?`)) +
   geom_bar(fill="darkblue") +
   labs(title = "Participants by Race",
        x = "Gender", y = "Number of Participants")

all_data %>%
   ggplot(aes(fill=`Disabled Community?`, x=`What is your age range?`)) +
   geom_bar() +
   labs(title = "Participants by Age Range",
        x = "Age Range", y = "Number of Participants")

all_data %>%
   ggplot(aes(fill=`LGBTQ+ Community?`, x=`What is your age range?`)) +
   geom_bar() +
   labs(title = "Participants by Age Range",
        x = "Age Range", y = "Number of Participants")

# Exploratory data analysis: car ownership

all_data %>%
   ggplot(aes(x=`What is your age range?`, fill=`Do you drive?`)) +
   geom_bar() +
   labs(title = "Participants by Age Range, Grouped by Driving",
        x = "Age Range", y = "Number of Participants")

all_data %>%
   ggplot(aes(x=`What is your age range?`, fill=`Do you own or have access to a car?`)) +
   geom_bar() +
   labs(title = "Participants by Age Range, Grouped by Car Ownership",
        x = "Age Range", y = "Number of Participants")

all_data %>%
   ggplot(aes(x=`What race/ethnicity do you identify with?`, fill=`Do you drive?`)) +
   geom_bar() +
   labs(title = "Participants by Age Range, Grouped by Driving",
        x = "Age Range", y = "Number of Participants")

all_data %>%
   ggplot(aes(x=`What race/ethnicity do you identify with?`, fill=`Do you own or have access to a car?`)) +
   geom_bar() +
   labs(title = "Participants by Age Range, Grouped by Car Ownership",
        x = "Age Range", y = "Number of Participants")

Mosaic Plots on Demographics versus Driving

race_and_driving_table <- table(all_data$`What race/ethnicity do you identify with?`, all_data$`Do you drive?`)
mosaicplot(race_and_driving_table, las = 1, shade=TRUE, main="Participants by Race and Driving")

race_and_car_table <- table(all_data$`What race/ethnicity do you identify with?`, all_data$`Do you own or have access to a car?`)
mosaicplot(race_and_car_table, las = 1, shade=TRUE, main="Participants by Race and Car Ownership")

age_and_driving_table <- table(all_data$`What is your age range?`, all_data$`Do you drive?`)
mosaicplot(age_and_driving_table, las = 1, shade=TRUE, main="Participants by Age and Driving")

age_and_car_table <- table(all_data$`What is your age range?`, all_data$`Do you own or have access to a car?`)
mosaicplot(age_and_car_table, las = 1, shade=TRUE, main="Participants by Age and Car Ownership")

disability_and_driving_table <- table(all_data$`Disabled Community?`, all_data$`Do you drive?`)
mosaicplot(disability_and_driving_table, las = 1, shade=TRUE, main="Participants by Disability and Driving")

disability_and_car_table <- table(all_data$`Disabled Community?`, all_data$`Do you own or have access to a car?`)
mosaicplot(disability_and_driving_table, las = 1, shade=TRUE, main="Participants by Disability and Car Ownership")

chisq.test(race_and_driving_table)
## 
##  Pearson's Chi-squared test
## 
## data:  race_and_driving_table
## X-squared = 1.5951, df = 2, p-value = 0.4504
chisq.test(race_and_car_table)
## 
##  Pearson's Chi-squared test
## 
## data:  race_and_car_table
## X-squared = 4.511, df = 2, p-value = 0.1048
chisq.test(age_and_driving_table)
## 
##  Pearson's Chi-squared test
## 
## data:  age_and_driving_table
## X-squared = 1.1706, df = 3, p-value = 0.7601
chisq.test(age_and_car_table)
## 
##  Pearson's Chi-squared test
## 
## data:  age_and_car_table
## X-squared = 1.8241, df = 3, p-value = 0.6097
chisq.test(disability_and_driving_table)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  disability_and_driving_table
## X-squared = 2.3478, df = 1, p-value = 0.1255
chisq.test(disability_and_car_table)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  disability_and_car_table
## X-squared = 3.7105, df = 1, p-value = 0.05407

Mosaic Plots on Age versus Disability, LGBTQ+ Community Membership

age_and_disability_table <- table(all_data$`What is your age range?`, all_data$`Disabled Community?`)
mosaicplot(age_and_disability_table, las = 1, shade=TRUE, main="Participants by Disability Community and Age")

chisq.test(age_and_disability_table)
## 
##  Pearson's Chi-squared test
## 
## data:  age_and_disability_table
## X-squared = 9.4657, df = 3, p-value = 0.0237
age_and_disability_table <- table(all_data$`What is your age range?`, all_data$`LGBTQ+ Community?`)
mosaicplot(age_and_disability_table, las = 1, shade=TRUE, main="Participants by LGBTQ+ Community and Age")

chisq.test(age_and_disability_table)
## 
##  Pearson's Chi-squared test
## 
## data:  age_and_disability_table
## X-squared = 3.0683, df = 3, p-value = 0.3812

Some Word Cloud Visualizations

# Some issues, words with different meanings (for example, park like the place versus the verb)
data(stop_words)

comparison_cloud <- function (variable) {
    responses <- all_data %>% select(`ID Number`, `Disabled Community?`,  variable)
    words <- responses %>% unnest_tokens(word, variable) 
    words <- words %>% filter(!(word %in% stop_words$word))
    
    words_disabled_community <- words %>% filter(`Disabled Community?` == "Yes")
    words_not_disabled_community <- words %>% filter(`Disabled Community?` == "No")
  
    
    tdm_both <- TermDocumentMatrix(c(words_disabled_community$word, words_not_disabled_community$word), control = list(stemming = TRUE, stopwords = TRUE))
    
    
    tdm_both <- as.matrix(tdm_both)
    tdm_yes_sum <- rowSums(tdm_both[,1:nrow(words_disabled_community)])
    tdm_no_sum <- rowSums(tdm_both[,nrow(words_disabled_community):(nrow(words_disabled_community)+nrow(words_not_disabled_community))])
    
    tdm_both_sum <- cbind(tdm_yes_sum, tdm_no_sum)
    colnames(tdm_both_sum) <- c("Disabled Community - Yes", "Disabled Community - No")
    
    comparison.cloud(tdm_both_sum, random.order = FALSE,
                     colors = c("black", "red"), max.words = 100, scale=c(2.2, 0.44))
    commonality.cloud(tdm_both_sum, random.order = FALSE,
                 colors = brewer.pal(8, "Dark2"), max.words = 100)

}


comparison_cloud("Places / Features / Activities")

comparison_cloud("First names & Relationship")

comparison_cloud("Are there other ways infrastructure changes in the city have impacted your social life?")

comparison_cloud("Has the city changed in a way that has made it easier for you to spend time with the people in your life?")

comparison_cloud("Please consider some changes to the city that have been made. Has the city changed in a way that has made it harder for you to spend time with the people in your life?")

comparison_cloud("Do you know anyone who has voiced an opinion or expressed some feelings about how they may or may not have places to go with others? Who was it and what have they said?")

Charts of POI Counts by Demographics

all_data_counts <- all_data %>%
  mutate(relationships_count = lengths(strsplit(`First names & Relationship`, split=",")), poi_count = lengths(strsplit(`Places / Features / Activities`, split=","))) 

all_data_counts %>% 
  ggplot(aes(x=relationships_count, y=poi_count, color=`Disabled Community?`)) +
  geom_point()

all_data_counts %>% 
  ggplot(aes(x=relationships_count, y=poi_count, color=`Do you own or have access to a car?`)) +
  geom_point()

all_data_counts %>% 
  ggplot(aes(x=relationships_count, y=poi_count, color=`Do you drive?`)) +
  geom_point()

all_data_counts %>% 
  ggplot(aes(x=relationships_count, y=poi_count, color=`What is your age range?`)) +
  geom_point()

all_data_counts %>% 
  ggplot(aes(x=relationships_count, y=poi_count, color=`What race/ethnicity do you identify with?`)) +
  geom_point()

count_by_demographic <- function (demographic) 
{
  all_data_counts %>% 
    group_by(across(all_of(demographic))) %>%
    summarize(avg_relationships = mean(relationships_count), avg_poi = mean(poi_count))
}


by_disability_community <- count_by_demographic(c("Disabled Community?")) 
by_disability_community %>%
   ggplot(aes(x=`Disabled Community?`, y=avg_poi, fill=avg_relationships)) +
   geom_bar(stat='identity') +
   labs(title = "POI Count by Disability Community Membership")

by_car_ownership <- count_by_demographic(c("Do you own or have access to a car?")) 
by_car_ownership %>%
   ggplot(aes(x=`Do you own or have access to a car?`, y=avg_poi, fill=avg_relationships)) +
   geom_bar(stat='identity') +
   labs(title = "POI Count by Car Ownership")

by_driving <- count_by_demographic(c("Do you drive?")) 
by_driving %>%
   ggplot(aes(x=`Do you drive?`, y=avg_poi, fill=avg_relationships)) +
   geom_bar(stat='identity') +
   labs(title = "POI Count by Driving")

by_race <- count_by_demographic(c("What race/ethnicity do you identify with?")) 
by_race %>%
   ggplot(aes(x=`What race/ethnicity do you identify with?`, y=avg_poi, fill=avg_relationships)) +
   geom_bar(stat='identity') +
   labs(title = "POI Count by Race")

by_age <- count_by_demographic(c("What is your age range?")) 
by_age %>%
   ggplot(aes(x=`What is your age range?`, y=avg_poi, fill=avg_relationships)) +
   geom_bar(stat='identity') +
   labs(title = "POI Count by Age")